library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggmosaic)
library(gridExtra)
library(productplots)
##
## Attaching package: 'productplots'
## The following objects are masked from 'package:ggmosaic':
##
## ddecker, happy, hbar, hspine, mosaic, prodcalc, spine, vbar,
## vspine
library(reshape2)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
1. Read and plotly for dynamic class data
dat <- read.table("https://raw.githubusercontent.com/bcaffo/ds4bme/master/data/classInterests.txt", header = TRUE)
plot_ly(data = dat, x = ~Year, y = ~ Program, type = 'bar') %>%
layout(title = 'Year of Class')
plot_ly(data = dat, x = ~Program, y = ~ Year, type = 'bar') %>%
layout(title = 'Program of Class')
2. Mosaic plot of the class data
dat <- read.table("https://raw.githubusercontent.com/bcaffo/ds4bme/master/data/classInterests.txt", header = TRUE)
dat$Year <- factor(dat$Year, levels = c("Master's","PhD","Sophomore","Senior","Junior")) #Change the order to let y scale labels more clear.
#\as.character(dat$Year)
#as.charac(dat$Program)
plt <- ggplot(data = dat)+
geom_mosaic(aes(x = product(Year), fill = Program), na.rm=TRUE)+
ggtitle("Mosaic plot of Year and Program")+
theme(plot.title = element_text(hjust = 0.5))
plt = plt + labs(x = 'Year', y = 'Program')+
theme(axis.text.x = element_text(angle = 60, vjust = 0.8))
ggplotly(plt)
3. Plot for the relationship between year and US healthcare spending
data <- read.csv("https://raw.githubusercontent.com/jhu-advdatasci/2018/master/data/KFF/healthcare-spending.csv", header = FALSE)
data <- as.data.frame(data)
cdata1 <- data[-c(1, 2, 4, 56: 64), ]
cdata2 <- cdata1[,-1]
rownames(cdata2) <- cdata1[,1]
cdata <- cdata2[-1,]
colnames(cdata) <- c(1991 : 2014)
states <- rownames(cdata)
year <- colnames(cdata)
cdata <- tibble::rownames_to_column(cdata)
rowname <- cdata[,1]
d <- gather(cdata, year, spending, "1991":"2014")
## Warning: attributes are not identical across measure variables;
## they will be dropped
d$spending <- as.numeric(as.character(d$spending))
d$year <- as.numeric(as.character(d$year))
colnames(d)[1] <- "States"
#transform(d, spending = as.numeric(as.character(spending)))
#transform(d, year <- as.numeric(year))
plot_ly(d, x = ~year, y = ~spending, color = ~States, type = 'scatter', mode = 'lines') %>%
layout(title = 'Healthcare Spending versus Time, color coded by States')
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
4. Plot average healthcare spending by state
data <- read.csv("https://raw.githubusercontent.com/jhu-advdatasci/2018/master/data/KFF/healthcare-spending.csv", header = FALSE)
data <- as.matrix(data)
cdata6_1 <- data[-c(1, 2, 4, 56: 64), ]
cdata6_2 <- cdata6_1[-1,]
colnames(cdata6_2) <- c('rnm', 1991 : 2014)
cdata6 <- cdata6_2[,-1]
ndata <- mapply(cdata6, FUN = as.numeric)
ndata <- matrix(data = ndata, ncol = ncol(cdata6), nrow = nrow(cdata6))
colnames(ndata) <- c(1991 : 2014)
states <- cdata6_2[,1]
mean <- apply(ndata, 1, mean)
data_f <- cbind(states, mean)
ddata <- data_f[,-1]
data_f <- as.data.frame(data_f)
rownames(data_f) <- states
data_f$mean <- as.numeric(as.character(data_f$mean))
y <- list(
title = "Spending"
)
plot_ly(data_f, x = ~states, y = ~mean, type = 'bar') %>%
layout(title = "Average Health Care Spending by State", yaxis = y)